lifeExpectancy <- read.csv('/Users/laurenflemmer/Desktop/life_expectancy_proj/Life Expectancy Data.csv')
lifeExpectancy <- lifeExpectancy %>% drop_na()
head(lifeExpectancy, 5)
## Country Year Status Life.expectancy Adult.Mortality infant.deaths
## 1 Afghanistan 2015 Developing 65.0 263 62
## 2 Afghanistan 2014 Developing 59.9 271 64
## 3 Afghanistan 2013 Developing 59.9 268 66
## 4 Afghanistan 2012 Developing 59.5 272 69
## 5 Afghanistan 2011 Developing 59.2 275 71
## Alcohol percentage.expenditure Hepatitis.B Measles BMI under.five.deaths
## 1 0.01 71.279624 65 1154 19.1 83
## 2 0.01 73.523582 62 492 18.6 86
## 3 0.01 73.219243 64 430 18.1 89
## 4 0.01 78.184215 67 2787 17.6 93
## 5 0.01 7.097109 68 3013 17.2 97
## Polio Total.expenditure Diphtheria HIV.AIDS GDP Population
## 1 6 8.16 65 0.1 584.25921 33736494
## 2 58 8.18 62 0.1 612.69651 327582
## 3 62 8.13 64 0.1 631.74498 31731688
## 4 67 8.52 67 0.1 669.95900 3696958
## 5 68 7.87 68 0.1 63.53723 2978599
## thinness..1.19.years thinness.5.9.years Income.composition.of.resources
## 1 17.2 17.3 0.479
## 2 17.5 17.5 0.476
## 3 17.7 17.7 0.470
## 4 17.9 18.0 0.463
## 5 18.2 18.2 0.454
## Schooling
## 1 10.1
## 2 10.0
## 3 9.9
## 4 9.8
## 5 9.5
colSums(is.na(lifeExpectancy))
## Country Year
## 0 0
## Status Life.expectancy
## 0 0
## Adult.Mortality infant.deaths
## 0 0
## Alcohol percentage.expenditure
## 0 0
## Hepatitis.B Measles
## 0 0
## BMI under.five.deaths
## 0 0
## Polio Total.expenditure
## 0 0
## Diphtheria HIV.AIDS
## 0 0
## GDP Population
## 0 0
## thinness..1.19.years thinness.5.9.years
## 0 0
## Income.composition.of.resources Schooling
## 0 0
par(mfrow=c(2,2))
# diagnostic plots
basic_model <- lm(Life.expectancy ~ ., data = lifeExpectancy)
plot(basic_model)
# plot response
ggplot(data=lifeExpectancy) +
geom_histogram(mapping=aes(x=Life.expectancy), bins=10) +
ggtitle("Distribution of life expectancy")
ggplot(data=lifeExpectancy) +
geom_boxplot(mapping=aes(x=Country, y=Life.expectancy, color=Country), bins=10) +
ggtitle("Life Expectancy by Country") +
theme(legend.position = "none") +
coord_flip() +
theme(axis.text.x = element_text(angle = 80, vjust = 0.5, hjust=1, size=0.01))
# correlation plot
# include only continuous vars
# removed percentage expenditure bc redundant
continuous_vars <- lifeExpectancy %>% select(Year, Life.expectancy,
Adult.Mortality, infant.deaths,
Alcohol, Hepatitis.B, Measles, BMI,
under.five.deaths, Polio, Total.expenditure,
Diphtheria, HIV.AIDS, GDP, Population,
thinness..1.19.years, thinness.5.9.years,
Income.composition.of.resources, Schooling)
corMat <- cor(continuous_vars)
corrplot(corMat, type="lower", method="circle", tl.cex = 0.75)
corMat
## Year Life.expectancy Adult.Mortality
## Year 1.000000000 0.05077103 -0.037091782
## Life.expectancy 0.050771035 1.00000000 -0.702523062
## Adult.Mortality -0.037091782 -0.70252306 1.000000000
## infant.deaths 0.008029128 -0.16907380 0.042450237
## Alcohol -0.113364764 0.40271832 -0.175535086
## Hepatitis.B 0.114897092 0.19993528 -0.105225443
## Measles -0.053822046 -0.06888122 -0.003966685
## BMI 0.005739061 0.54204159 -0.351542478
## under.five.deaths 0.010478594 -0.19226530 0.060365026
## Polio -0.016698803 0.32729440 -0.199853000
## Total.expenditure 0.059492777 0.17471764 -0.085226535
## Diphtheria 0.029640586 0.34133123 -0.191428759
## HIV.AIDS -0.123404990 -0.59223629 0.550690745
## GDP 0.096421485 0.44132181 -0.255034733
## Population 0.012566893 -0.02230498 -0.015011838
## thinness..1.19.years 0.019756611 -0.45783819 0.272230044
## thinness.5.9.years 0.014122422 -0.45750829 0.286722882
## Income.composition.of.resources 0.122891780 0.72108259 -0.442203288
## Schooling 0.088731787 0.72763003 -0.421170523
## infant.deaths Alcohol Hepatitis.B
## Year 0.008029128 -0.11336476 0.11489709
## Life.expectancy -0.169073804 0.40271832 0.19993528
## Adult.Mortality 0.042450237 -0.17553509 -0.10522544
## infant.deaths 1.000000000 -0.10621692 -0.23176894
## Alcohol -0.106216917 1.00000000 0.10988939
## Hepatitis.B -0.231768937 0.10988939 1.00000000
## Measles 0.532679832 -0.05011023 -0.12479999
## BMI -0.234425154 0.35339621 0.14330179
## under.five.deaths 0.996905622 -0.10108216 -0.24076603
## Polio -0.156928805 0.24031453 0.46333080
## Total.expenditure -0.146951117 0.21488509 0.11332668
## Diphtheria -0.161871004 0.24295143 0.58898993
## HIV.AIDS 0.007711547 -0.02711264 -0.09480197
## GDP -0.098092020 0.44343279 0.04184950
## Population 0.671758310 -0.02888023 -0.12972265
## thinness..1.19.years 0.463415256 -0.40375499 -0.12940595
## thinness.5.9.years 0.461907925 -0.38620819 -0.13325099
## Income.composition.of.resources -0.134753863 0.56107433 0.18492097
## Schooling -0.214371900 0.61697481 0.21518159
## Measles BMI under.five.deaths
## Year -0.053822046 0.005739061 0.01047859
## Life.expectancy -0.068881222 0.542041588 -0.19226530
## Adult.Mortality -0.003966685 -0.351542478 0.06036503
## infant.deaths 0.532679832 -0.234425154 0.99690562
## Alcohol -0.050110235 0.353396205 -0.10108216
## Hepatitis.B -0.124799993 0.143301786 -0.24076603
## Measles 1.000000000 -0.153245464 0.51750556
## BMI -0.153245464 1.000000000 -0.24213740
## under.five.deaths 0.517505563 -0.242137398 1.00000000
## Polio -0.057850133 0.186267965 -0.17116419
## Total.expenditure -0.113582738 0.189468964 -0.14580310
## Diphtheria -0.058605907 0.176294503 -0.17844819
## HIV.AIDS -0.003521854 -0.210896746 0.01947593
## GDP -0.064767590 0.266113973 -0.10033126
## Population 0.321946377 -0.081415982 0.65867969
## thinness..1.19.years 0.180641506 -0.547017514 0.46478470
## thinness.5.9.years 0.174946217 -0.554093981 0.46228938
## Income.composition.of.resources -0.058277256 0.510504831 -0.14809728
## Schooling -0.115660481 0.554843903 -0.22601262
## Polio Total.expenditure Diphtheria
## Year -0.01669880 0.05949278 0.02964059
## Life.expectancy 0.32729440 0.17471764 0.34133123
## Adult.Mortality -0.19985300 -0.08522653 -0.19142876
## infant.deaths -0.15692881 -0.14695112 -0.16187100
## Alcohol 0.24031453 0.21488509 0.24295143
## Hepatitis.B 0.46333080 0.11332668 0.58898993
## Measles -0.05785013 -0.11358274 -0.05860591
## BMI 0.18626797 0.18946896 0.17629450
## under.five.deaths -0.17116419 -0.14580310 -0.17844819
## Polio 1.00000000 0.11976798 0.60924547
## Total.expenditure 0.11976798 1.00000000 0.12991481
## Diphtheria 0.60924547 0.12991481 1.00000000
## HIV.AIDS -0.10788547 0.04310066 -0.11760107
## GDP 0.15680869 0.18037347 0.15843774
## Population -0.04538657 -0.07996224 -0.03989754
## thinness..1.19.years -0.16406959 -0.20987232 -0.18724165
## thinness.5.9.years -0.17448925 -0.21786479 -0.18095238
## Income.composition.of.resources 0.31468159 0.18365319 0.34326177
## Schooling 0.35014660 0.24378345 0.35039793
## HIV.AIDS GDP Population
## Year -0.123404990 0.09642148 0.012566893
## Life.expectancy -0.592236293 0.44132181 -0.022304978
## Adult.Mortality 0.550690745 -0.25503473 -0.015011838
## infant.deaths 0.007711547 -0.09809202 0.671758310
## Alcohol -0.027112636 0.44343279 -0.028880232
## Hepatitis.B -0.094801971 0.04184950 -0.129722655
## Measles -0.003521854 -0.06476759 0.321946377
## BMI -0.210896746 0.26611397 -0.081415982
## under.five.deaths 0.019475927 -0.10033126 0.658679691
## Polio -0.107885468 0.15680869 -0.045386572
## Total.expenditure 0.043100657 0.18037347 -0.079962237
## Diphtheria -0.117601074 0.15843774 -0.039897537
## HIV.AIDS 1.000000000 -0.10808060 -0.027800562
## GDP -0.108080600 1.00000000 -0.020368964
## Population -0.027800562 -0.02036896 1.000000000
## thinness..1.19.years 0.172591767 -0.27749835 0.282529280
## thinness.5.9.years 0.183146727 -0.27795855 0.277913374
## Income.composition.of.resources -0.248589855 0.44685551 -0.008132466
## Schooling -0.211840201 0.46794697 -0.040312419
## thinness..1.19.years thinness.5.9.years
## Year 0.01975661 0.01412242
## Life.expectancy -0.45783819 -0.45750829
## Adult.Mortality 0.27223004 0.28672288
## infant.deaths 0.46341526 0.46190792
## Alcohol -0.40375499 -0.38620819
## Hepatitis.B -0.12940595 -0.13325099
## Measles 0.18064151 0.17494622
## BMI -0.54701751 -0.55409398
## under.five.deaths 0.46478470 0.46228938
## Polio -0.16406959 -0.17448925
## Total.expenditure -0.20987232 -0.21786479
## Diphtheria -0.18724165 -0.18095238
## HIV.AIDS 0.17259177 0.18314673
## GDP -0.27749835 -0.27795855
## Population 0.28252928 0.27791337
## thinness..1.19.years 1.00000000 0.92791344
## thinness.5.9.years 0.92791344 1.00000000
## Income.composition.of.resources -0.45367885 -0.43848372
## Schooling -0.49119921 -0.47248203
## Income.composition.of.resources Schooling
## Year 0.122891780 0.08873179
## Life.expectancy 0.721082593 0.72763003
## Adult.Mortality -0.442203288 -0.42117052
## infant.deaths -0.134753863 -0.21437190
## Alcohol 0.561074332 0.61697481
## Hepatitis.B 0.184920970 0.21518159
## Measles -0.058277256 -0.11566048
## BMI 0.510504831 0.55484390
## under.five.deaths -0.148097276 -0.22601262
## Polio 0.314681594 0.35014660
## Total.expenditure 0.183653190 0.24378345
## Diphtheria 0.343261772 0.35039793
## HIV.AIDS -0.248589855 -0.21184020
## GDP 0.446855511 0.46794697
## Population -0.008132466 -0.04031242
## thinness..1.19.years -0.453678854 -0.49119921
## thinness.5.9.years -0.438483721 -0.47248203
## Income.composition.of.resources 1.000000000 0.78474058
## Schooling 0.784740581 1.00000000
# status vs. life expectancy
developing_expectancy <- lifeExpectancy %>% filter(Status == "Developing") %>% select(Life.expectancy)
developed_expectancy <- lifeExpectancy %>% filter(Status == "Developed") %>% select(Life.expectancy)
boxplot_developing <- ggplot(data=developing_expectancy) +
geom_boxplot(mapping=aes(x=Life.expectancy)) +
ggtitle("Life Expectancy for Developing Countries") +
xlab("Life Expectancy")
boxplot_developed <- ggplot(data=developed_expectancy) +
geom_boxplot(mapping=aes(x=Life.expectancy)) +
ggtitle("Life Expectancy for Developed Countries") +
xlab("Life Expectancy")
grid.arrange(boxplot_developing, boxplot_developed)
# convert country to factor
lifeExpectancy$Country <- as.factor(lifeExpectancy$Country)
# convert HDI from 0-1 to 0-100 for better interpretability of model estimates
lifeExpectancy$Income.composition.of.resources <- lifeExpectancy$Income.composition.of.resources * 100
# fit initial model
initialModel <- geeglm(formula = Life.expectancy ~ Status + Year + Adult.Mortality + Alcohol + BMI
+ HIV.AIDS + log(GDP) + thinness..1.19.years + thinness.5.9.years
+ Income.composition.of.resources + Schooling, id = Country,
data = lifeExpectancy, family = "gaussian")
summary(initialModel)$coefficients
## Estimate Std.err Wald
## (Intercept) 358.66456721 75.453919934 22.5950515
## StatusDeveloping -1.36926240 0.819073000 2.7946546
## Year -0.15247061 0.038030626 16.0733104
## Adult.Mortality -0.01748929 0.002305808 57.5305255
## Alcohol -0.18849973 0.085152151 4.9003850
## BMI 0.02743665 0.010028907 7.4843649
## HIV.AIDS -0.44441915 0.049598995 80.2859892
## log(GDP) 0.49802463 0.095407183 27.2483054
## thinness..1.19.years -0.03846752 0.068831247 0.3123324
## thinness.5.9.years -0.03477710 0.057004658 0.3721912
## Income.composition.of.resources 0.11102774 0.027585366 16.1996464
## Schooling 0.93039398 0.164142456 32.1286090
## Pr(>|W|)
## (Intercept) 1.999991e-06
## StatusDeveloping 9.457914e-02
## Year 6.093684e-05
## Adult.Mortality 3.330669e-14
## Alcohol 2.685071e-02
## BMI 6.223702e-03
## HIV.AIDS 0.000000e+00
## log(GDP) 1.789319e-07
## thinness..1.19.years 5.762524e-01
## thinness.5.9.years 5.418123e-01
## Income.composition.of.resources 5.700475e-05
## Schooling 1.442967e-08
# plot residuals
initialResid <- as.vector(initialModel$residuals)
mean_abs_resid_initial <- mean(abs(initialResid))
ggplot() +
geom_histogram(aes(initialResid)) +
ggtitle(label = "Residuals of initial model \n(independent correlation structure, gaussian)",
subtitle = paste("Mean absolute error: ", mean_abs_resid_initial))
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
# fit second model
secondModel <- geeglm(formula = Life.expectancy ~ Status + Year + Adult.Mortality + Alcohol + BMI +
HIV.AIDS + log(GDP) + Income.composition.of.resources + Schooling,
id = Country, data = lifeExpectancy, family = "gaussian")
summary(secondModel)$coefficients
## Estimate Std.err Wald Pr(>|W|)
## (Intercept) 362.17234816 74.85685334 23.408169 1.310210e-06
## StatusDeveloping -1.37665316 0.82975532 2.752637 9.709416e-02
## Year -0.15465680 0.03767976 16.846968 4.051801e-05
## Adult.Mortality -0.01753501 0.00229392 58.432646 2.098322e-14
## Alcohol -0.17648021 0.08527053 4.283454 3.848506e-02
## BMI 0.03410873 0.01044020 10.673663 1.086717e-03
## HIV.AIDS -0.44699537 0.04902540 83.131154 0.000000e+00
## log(GDP) 0.49770594 0.09523882 27.309744 1.733357e-07
## Income.composition.of.resources 0.11214319 0.02707574 17.154775 3.445422e-05
## Schooling 0.94413800 0.15991610 34.856724 3.548818e-09
# plot residuals
secondResid <- as.vector(secondModel$residuals)
mean_abs_resid_second <- mean(abs(secondResid))
ggplot() +
geom_histogram(aes(secondResid)) +
ggtitle(label = "Residuals of second model \n(independent correlation structure)",
subtitle = paste("Mean absolute error: ", mean_abs_resid_second))
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
# fit initial model
initialModel_ar1 <- geeglm(formula = Life.expectancy ~ Status + Year + Adult.Mortality + Alcohol + BMI
+ HIV.AIDS + log(GDP) + thinness..1.19.years + thinness.5.9.years
+ Income.composition.of.resources + Schooling, id = Country,
data = lifeExpectancy, corstr = "ar1", family = "gaussian")
summary(initialModel_ar1)$coefficients
## Estimate Std.err Wald
## (Intercept) -8.615082e+01 5.706060e+01 2.279534421
## StatusDeveloping -5.824818e+00 1.040977e+00 31.309973394
## Year 7.304077e-02 2.906636e-02 6.314649336
## Adult.Mortality -7.352253e-04 5.368132e-04 1.875834428
## Alcohol -5.622125e-03 2.282424e-02 0.060674809
## BMI 1.463369e-03 2.999144e-03 0.238074626
## HIV.AIDS -4.078279e-01 5.870129e-02 48.267915457
## log(GDP) 1.952508e-03 3.141030e-02 0.003864038
## thinness..1.19.years -4.008282e-02 3.277668e-02 1.495500857
## thinness.5.9.years 1.521806e-02 2.768839e-02 0.302080819
## Income.composition.of.resources 1.683596e-02 4.707713e-03 12.789572690
## Schooling 1.103759e+00 1.234923e-01 79.885624828
## Pr(>|W|)
## (Intercept) 1.310912e-01
## StatusDeveloping 2.199468e-08
## Year 1.197444e-02
## Adult.Mortality 1.708083e-01
## Alcohol 8.054325e-01
## BMI 6.256002e-01
## HIV.AIDS 3.717804e-12
## log(GDP) 9.504343e-01
## thinness..1.19.years 2.213649e-01
## thinness.5.9.years 5.825809e-01
## Income.composition.of.resources 3.485567e-04
## Schooling 0.000000e+00
# plot residuals
initialResid_ar1 <- as.vector(initialModel_ar1$residuals)
mean_abs_resid_initial_ar1 <- mean(abs(initialResid_ar1))
ggplot() +
geom_histogram(aes(initialResid_ar1)) +
ggtitle(label = "Residuals of initial model \n(AR1 correlation structure)",
subtitle = paste("Mean absolute error: ", mean_abs_resid_initial_ar1))
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
# fit initial model
model2_ar1 <- geeglm(formula = Life.expectancy ~ Status + Year + HIV.AIDS
+ Income.composition.of.resources + Schooling, id = Country,
data = lifeExpectancy, corstr = "ar1", family = "gaussian")
summary(model2_ar1)$coefficients
## Estimate Std.err Wald
## (Intercept) -92.69412291 56.417504072 2.699459
## StatusDeveloping -5.95221369 1.031812649 33.277856
## Year 0.07619169 0.028783899 7.006739
## HIV.AIDS -0.40892008 0.060509651 45.669640
## Income.composition.of.resources 0.01685699 0.004582926 13.529264
## Schooling 1.11410328 0.122475875 82.746532
## Pr(>|W|)
## (Intercept) 1.003823e-01
## StatusDeveloping 7.988718e-09
## Year 8.120345e-03
## HIV.AIDS 1.399758e-11
## Income.composition.of.resources 2.348722e-04
## Schooling 0.000000e+00
# plot residuals
model2Resid_ar1 <- as.vector(model2_ar1$residuals)
mean_abs_resid_model2_ar1 <- mean(abs(model2Resid_ar1))
ggplot() +
geom_histogram(aes(model2Resid_ar1)) +
ggtitle(label = "Residuals of Model 2 \n(AR1 correlation structure)",
subtitle = paste("Mean absolute error: ", mean_abs_resid_model2_ar1))
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
# fit independent model w/ same predictors as final AR-1 model to compare standard errors
testIndepModel <- geeglm(formula = Life.expectancy ~ Status + Year + HIV.AIDS
+ Income.composition.of.resources + Schooling,
id = Country, data = lifeExpectancy, family = "gaussian")
summary(testIndepModel)$coefficients
## Estimate Std.err Wald Pr(>|W|)
## (Intercept) 360.5058009 84.26742318 18.302275 1.884819e-05
## StatusDeveloping -1.2318027 0.97380582 1.600065 2.058940e-01
## Year -0.1556290 0.04253417 13.387679 2.532826e-04
## HIV.AIDS -0.6413125 0.08679507 54.594540 1.481038e-13
## Income.composition.of.resources 0.1434149 0.03599286 15.876533 6.761189e-05
## Schooling 1.1997387 0.20442289 34.444057 4.386801e-09
fitted_vals <- model2_ar1$fitted.values
actual_vals <- lifeExpectancy$Life.expectancy
actual_fitted_df <- as.data.frame(cbind(fitted_vals, actual_vals, model2Resid_ar1))
names(actual_fitted_df)[1] <- "fitted_vals"
names(actual_fitted_df)[3] <- "Residual"
ggplot(actual_fitted_df) +
geom_point(aes(x=actual_vals, fitted_vals, color=Residual)) +
ggtitle("Actual vs. Fitted Values for AR-1 Model 2") +
xlab("Actual Life Expectancy") +
ylab("Predicted Life Expectancy")
feature_names <- c("Development \n Status", "Year", "HIV/AIDS Deaths", "Human Development \n Index", "Years of Schooling")
abs_coeff <- abs(model2_ar1$coefficients[-1])
ggplot() +
geom_col(aes(x=feature_names, y=abs_coeff), fill = "dark blue") +
ggtitle("Feature Importance of AR-1 Model 2") +
xlab("Feature") +
ylab("Absolute Value of Coefficient")
kmeans_vars <- continuous_vars[, c(-1, -2, -18)]
k_vec <- 3:30
tot_within_SS <- c()
# Cross-Validate to find the best K, which minimizes the total within cluster sum of squares
for (k in k_vec) {
clustering_k <- kmeans(kmeans_vars, centers=k, nstart=10, iter.max=100)
tot_within_SS <- c(tot_within_SS, clustering_k$tot.withinss)
}
# plot K vs. total within-cluster SS
ggplot() +
geom_line(aes(x=k_vec, y=tot_within_SS), color="blue", lwd = 1) +
ggtitle("Total Within-Cluster Sum of Squares for K = (3, 4, ..., 30)") +
xlab("K") +
ylab("Total Within-Cluster SS")
# perform final clustering
kmeans10 <- kmeans(kmeans_vars, centers=10, nstart=10)
new_clusters <- factor(kmeans10$cluster)
HDI <- lifeExpectancy$Income.composition.of.resources
# get countries belonging to each cluster
lifeExpectancy$cluster <- new_clusters
countries_cluster <- lifeExpectancy %>% group_by(cluster) %>% summarise(countries = str_c(unique(Country), collapse = ", "))
ggplot() +
geom_boxplot(aes(x=new_clusters, y=HDI, fill=new_clusters)) +
ggtitle("Human Development Index of New Cluster Members") +
xlab("Cluster")
# fit initial model
initialModel_clusters <- geeglm(formula = Life.expectancy ~ Status + Year + Adult.Mortality + Alcohol + BMI
+ HIV.AIDS + log(GDP) + thinness..1.19.years + thinness.5.9.years
+ Income.composition.of.resources + Schooling, id = cluster,
data = lifeExpectancy, corstr = "unstructured", family = "gaussian")
summary(initialModel_clusters)$coefficients
## Estimate Std.err Wald
## (Intercept) 412.836364926 3.869385e+02 1.1383400
## StatusDeveloping -1.473929273 7.888183e-01 3.4913980
## Year -0.181148442 1.923979e-01 0.8864789
## Adult.Mortality -0.008920334 2.079018e-03 18.4096550
## Alcohol -0.236824951 7.681688e-02 9.5047690
## BMI 0.029508709 2.773239e-02 1.1322070
## HIV.AIDS -0.577117408 6.453359e-02 79.9754470
## log(GDP) 0.316827281 3.834731e-01 0.6826143
## thinness..1.19.years -0.048453554 6.154738e-02 0.6197723
## thinness.5.9.years -0.052582088 6.310708e-02 0.6942558
## Income.composition.of.resources 0.123730217 6.308320e-02 3.8470175
## Schooling 1.169007353 3.651510e-01 10.2491891
## Pr(>|W|)
## (Intercept) 0.2860033650
## StatusDeveloping 0.0616884688
## Year 0.3464332873
## Adult.Mortality 0.0000178153
## Alcohol 0.0020493855
## BMI 0.2873050751
## HIV.AIDS 0.0000000000
## log(GDP) 0.4086879505
## thinness..1.19.years 0.4311318977
## thinness.5.9.years 0.4047205777
## Income.composition.of.resources 0.0498345339
## Schooling 0.0013674470
# plot residuals
initialResid_clusters <- as.vector(initialModel_clusters$residuals)
mean_abs_resid_initial_clusters <- mean(abs(initialResid_clusters))
ggplot() +
geom_histogram(aes(initialResid_clusters)) +
ggtitle(label = "Residuals of initial model \n(unstructured correlation structure, clusters from K-Means)",
subtitle = paste("Mean absolute error: ", mean_abs_resid_initial_clusters))
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
summary(initialModel_ar1)$coefficients
## Estimate Std.err Wald
## (Intercept) -8.615082e+01 5.706060e+01 2.279534421
## StatusDeveloping -5.824818e+00 1.040977e+00 31.309973394
## Year 7.304077e-02 2.906636e-02 6.314649336
## Adult.Mortality -7.352253e-04 5.368132e-04 1.875834428
## Alcohol -5.622125e-03 2.282424e-02 0.060674809
## BMI 1.463369e-03 2.999144e-03 0.238074626
## HIV.AIDS -4.078279e-01 5.870129e-02 48.267915457
## log(GDP) 1.952508e-03 3.141030e-02 0.003864038
## thinness..1.19.years -4.008282e-02 3.277668e-02 1.495500857
## thinness.5.9.years 1.521806e-02 2.768839e-02 0.302080819
## Income.composition.of.resources 1.683596e-02 4.707713e-03 12.789572690
## Schooling 1.103759e+00 1.234923e-01 79.885624828
## Pr(>|W|)
## (Intercept) 1.310912e-01
## StatusDeveloping 2.199468e-08
## Year 1.197444e-02
## Adult.Mortality 1.708083e-01
## Alcohol 8.054325e-01
## BMI 6.256002e-01
## HIV.AIDS 3.717804e-12
## log(GDP) 9.504343e-01
## thinness..1.19.years 2.213649e-01
## thinness.5.9.years 5.825809e-01
## Income.composition.of.resources 3.485567e-04
## Schooling 0.000000e+00
summary(initialModel_clusters)$coefficients
## Estimate Std.err Wald
## (Intercept) 412.836364926 3.869385e+02 1.1383400
## StatusDeveloping -1.473929273 7.888183e-01 3.4913980
## Year -0.181148442 1.923979e-01 0.8864789
## Adult.Mortality -0.008920334 2.079018e-03 18.4096550
## Alcohol -0.236824951 7.681688e-02 9.5047690
## BMI 0.029508709 2.773239e-02 1.1322070
## HIV.AIDS -0.577117408 6.453359e-02 79.9754470
## log(GDP) 0.316827281 3.834731e-01 0.6826143
## thinness..1.19.years -0.048453554 6.154738e-02 0.6197723
## thinness.5.9.years -0.052582088 6.310708e-02 0.6942558
## Income.composition.of.resources 0.123730217 6.308320e-02 3.8470175
## Schooling 1.169007353 3.651510e-01 10.2491891
## Pr(>|W|)
## (Intercept) 0.2860033650
## StatusDeveloping 0.0616884688
## Year 0.3464332873
## Adult.Mortality 0.0000178153
## Alcohol 0.0020493855
## BMI 0.2873050751
## HIV.AIDS 0.0000000000
## log(GDP) 0.4086879505
## thinness..1.19.years 0.4311318977
## thinness.5.9.years 0.4047205777
## Income.composition.of.resources 0.0498345339
## Schooling 0.0013674470